In [1]:
'''
FOR THE REVIEWER

Thanks for taking the time to review my project!

Here are a few notes:

1. I am not done and have not gotten to my central conclusion
2. I have finished:
    a. County Policy
    b. Cumulative sightings per state
    c. Sighting delta percentage change per month by state
    
3. I plan to do graphs (b) and (c) for counties, then I will correlate graph (a). Most of my graphs' "findings"
can be discounted for now because they are on the state level, making the scale too big.

'''
Out[1]:
'\nFOR THE REVIEWER\n\nThanks for taking the time to review my project!\n\nHere are a few notes:\n\n1. I am not done and have not gotten to my central conclusion\n2. I have finished:\n    a. County Policy\n    b. Cumulative sightings per state\n    c. Sighting delta percentage change per month by state\n    \n3. I plan to do graphs (b) and (c) for counties, then I will correlate graph (a). Most of my graphs\' "findings"\ncan be discounted for now because they are on the state level, making the scale too big.\n\n'
In [2]:
#Calculate an arbitrary "score" based on policy by county


from urllib.request import urlopen
import json
import plotly.graph_objects as go
import pandas as pd
import math
import numpy as np


with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)


df = pd.read_csv("data/County_Declaration_and_Policies.csv", dtype={"FIPS": str})
df = df.replace(np.nan, '', regex=True)

policyZ = np.zeros(len(df.index))

for i in range(1,len(df.index)):

    if len(str(df['FIPS'][i])) < 5:
        df.at[i, 'FIPS'] = "0" + str(df['FIPS'][i])

    if df['County Emergency Declaration'][i]:
        policyZ[i] += 1
    if df['Business Closure Policy'][i] or df['Safer-at-Home Policy'][i]:
        policyZ[i] += 1

df['PolicyZ'] = policyZ


'''
policy z is calculated based on these metrics:

+1 to state of emergency
+1 to business closure or stay at home
'''
Out[2]:
'\npolicy z is calculated based on these metrics:\n\n+1 to state of emergency\n+1 to business closure or stay at home\n'
In [3]:
#Plot counties with policy Z
fig = go.Figure(go.Choroplethmapbox(geojson=counties, 
                                    locations=df['FIPS'], 
                                    z=df['PolicyZ'],
                                    colorscale="blues",
                                    marker_opacity=0.75, 
                                    marker_line_width=0,))

fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=3,
                  mapbox_center ={"lat": 37.0902, "lon": -95.7129},)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


#disregard the bar on the right
In [4]:
#sanitize DF

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.cluster import KMeans
import math
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import datetime
from tqdm import tqdm
import reverse_geocoder as rg
from urllib.request import urlopen
import json
import us

birdData = []
#large
with open('data/ebd_US_grhowl_201901_202006_relMay-2020.txt') as f:
#with open('data/ebd_US-AL-101_201801_201801_relMay-2018.txt') as f:
    for line in f:
        birdData.append(line.split('\t')[:-1])
df = pd.DataFrame(birdData)
header = df.iloc[0]
df = df[1:]
df.columns=header

df = df[df['COUNTY CODE'] != ""]
df = df[df.STATE != "Alaska"]
df.reset_index(inplace = True, drop = True)

df['OBSERVATION COUNT'].replace('X','0', inplace=True)
df['OBSERVATION COUNT'] = df['OBSERVATION COUNT'].astype(int)

df['OBSERVATION DATE'] = pd.to_datetime(df['OBSERVATION DATE'])

df['STATE CODE'] = [ code[3:] for code in df['STATE CODE']]


def getFips(code):
    state = us.states.lookup(code[3:5])
    return state.fips + code[6:]


df['COUNTY CODE'] = [ getFips(code) for code in df['COUNTY CODE']]

print(df['COUNTY CODE'])

'''
def getFip(fip):
    fip = str(fip[0][0] + 1)
    if len(fip) < 2:
        fip = "0" + fip
    return fip

state_abbrev = df.sort_values(by = ['STATE CODE'])['STATE CODE'].unique()
state_fip = [getFip(np.where(state_abbrev == fip)) for fip in state_abbrev]
print(state_fip)
'''
0         06077
1         04019
2         25017
3         49007
4         34001
          ...  
155965    06053
155966    49057
155967    30029
155968    06079
155969    30031
Name: COUNTY CODE, Length: 155970, dtype: object
Out[4]:
'\ndef getFip(fip):\n    fip = str(fip[0][0] + 1)\n    if len(fip) < 2:\n        fip = "0" + fip\n    return fip\n\nstate_abbrev = df.sort_values(by = [\'STATE CODE\'])[\'STATE CODE\'].unique()\nstate_fip = [getFip(np.where(state_abbrev == fip)) for fip in state_abbrev]\nprint(state_fip)\n'
In [5]:
#bird count by state and month
count_by_state_month = df.groupby([df['OBSERVATION DATE'].dt.to_period('M'), 'STATE CODE'])['OBSERVATION COUNT'].sum().to_frame().reset_index()

#make a new dataframe "deltas" to get rid of 01-2019
deltas_by_state_month = count_by_state_month
deltas_by_state_month['DELTA'] = deltas_by_state_month.sort_values(by=['STATE CODE', 'OBSERVATION DATE'])['OBSERVATION COUNT'].pct_change()
deltas_by_state_month = deltas_by_state_month[count_by_state_month['OBSERVATION DATE'] != '2019-01']
print(deltas_by_state_month)
    OBSERVATION DATE STATE CODE  OBSERVATION COUNT     DELTA
49           2019-02         AL                 49 -0.478723
50           2019-02         AR                 26 -0.277778
51           2019-02         AZ                546 -0.149533
52           2019-02         CA               1873 -0.351903
53           2019-02         CO                692 -0.124051
..               ...        ...                ...       ...
818          2020-05         VT                 87  0.500000
819          2020-05         WA                906 -0.037194
820          2020-05         WI                647 -0.158648
821          2020-05         WV                 19 -0.387097
822          2020-05         WY                254  2.386667

[774 rows x 4 columns]
In [6]:
#total sightings per month
count_by_state = count_by_state_month.groupby(count_by_state_month['STATE CODE'])['OBSERVATION COUNT'].sum().to_frame().reset_index()

fig = go.Figure(data=go.Choropleth(
    locations=count_by_state['STATE CODE'],
    z = count_by_state['OBSERVATION COUNT'].astype(float),
    locationmode = 'USA-states',
    colorscale = 'blues',
    colorbar_title = "Owls Sighted",
))

fig.update_layout(
    title_text = '(01-2019)-(05-2020) Great Horned Owl Sightings by State',
    geo_scope='usa',
)

fig.show()
In [7]:
#population by state per month

fig = go.Figure()

for month in deltas_by_state_month['OBSERVATION DATE'].unique():
    this_month = deltas_by_state_month.loc[deltas_by_state_month['OBSERVATION DATE'] == month]
    fig.add_trace(
        go.Choropleth(
            hoverinfo='text',
            text= this_month['OBSERVATION COUNT'],
            locations=this_month['STATE CODE'],
            z = this_month['OBSERVATION COUNT'].astype(int),
            locationmode = 'USA-states',
            colorscale = 'darkmint',
            colorbar_title = "Owls Sighted",
        )
    )
    
fig.data[0].visible = True
steps = []
i = 0
for month in deltas_by_state_month['OBSERVATION DATE'].unique():
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Great Horned Owl sightings per Month by State"}],
        label=str(month)
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    i += 1

sliders = [dict(
    active=0,
    pad={"t": 50},
    steps=steps,
    transition={'duration': 1000, 'easing': 'cubic-in-out'},
)]

fig.update_layout(
    sliders=sliders,
    geo_scope='usa',
)

fig.show()
In [8]:
#percent increase in sightings per month by state

fig = go.Figure()

for month in deltas_by_state_month['OBSERVATION DATE'].unique():
    this_month = deltas_by_state_month.loc[deltas_by_state_month['OBSERVATION DATE'] == month]
    fig.add_trace(
        go.Choropleth(
            hoverinfo='text',
            text= this_month['DELTA']*100,
            locations=this_month['STATE CODE'],
            z = this_month['DELTA'].astype(float)*100,
            locationmode = 'USA-states',
            colorscale = 'BrBg',
            zmid=0,
            zmin=-100,
            zmax=100,
            colorbar_title = "Percent Change",
        )
    )
    
fig.data[0].visible = True
steps = []
i = 0
for month in deltas_by_state_month['OBSERVATION DATE'].unique():
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Delta Percent change in bird sightings per Month by State"}],
        label=str(month)
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    i += 1

sliders = [dict(
    active=0,
    pad={"t": 50},
    steps=steps,
    transition={'duration': 1000, 'easing': 'cubic-in-out'},
)]

fig.update_layout(
    sliders=sliders,
    geo_scope='usa',
)

fig.show()
In [9]:
#bird count by state and month
count_by_county_month = df.groupby([df['OBSERVATION DATE'].dt.to_period('M'), 'COUNTY CODE'])['OBSERVATION COUNT'].sum().to_frame().reset_index()

#make a new dataframe "deltas" to get rid of 01-2019
deltas_by_county_month = count_by_county_month
deltas_by_county_month['DELTA'] = deltas_by_county_month.sort_values(by=['COUNTY CODE', 'OBSERVATION DATE'])['OBSERVATION COUNT'].pct_change()
deltas_by_county_month = deltas_by_county_month[count_by_county_month['OBSERVATION DATE'] != '2019-01']
print(deltas_by_county_month)
      OBSERVATION DATE COUNTY CODE  OBSERVATION COUNT     DELTA
1243           2019-02       01003                 16  0.000000
1244           2019-02       01005                  6 -0.700000
1245           2019-02       01011                  3  0.500000
1246           2019-02       01033                  1  0.000000
1247           2019-02       01035                  1  0.000000
...                ...         ...                ...       ...
19368          2020-05       56033                 59  3.538462
19369          2020-05       56035                  1  0.000000
19370          2020-05       56037                  4  1.000000
19371          2020-05       56039                 23  2.833333
19372          2020-05       56041                  1  0.000000

[18130 rows x 4 columns]
In [10]:
#total sightings per month
count_by_county = count_by_county_month.groupby(count_by_county_month['COUNTY CODE'])['OBSERVATION COUNT'].sum().to_frame().reset_index()

fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=count_by_county['COUNTY CODE'].unique(), z=count_by_county['OBSERVATION COUNT'],
                                    colorscale="blues",
                                    marker_opacity=0.75, 
                                    marker_line_width=0,
                                    zmax=500,
                                    zmin=0,))

fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=3, mapbox_center = {"lat": 37.0902, "lon": -95.7129})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

print("(01-2019)-(05-2020) Great Horned Owl Sightings by County")
fig.show()
(01-2019)-(05-2020) Great Horned Owl Sightings by County
In [ ]: